Read prepared data.

rr subscriptions <- read_rds(‘../data/subscriptions.rds’)

summary(subscriptions)
   customerid       subscriptionid       periodend           revenuecurr        revenuecurrinclvat billingcurrency      startmonth            endmonth          isthreetoonesubs 
 Min.   :   10006   Min.   :     154   Min.   :2004-03-15   Min.   :      0.8   Min.   :      1    Length:2301011     Min.   :2003-12-01   Min.   :2004-03-01   Min.   :0.00000  
 1st Qu.: 1109200   1st Qu.: 5195644   1st Qu.:2013-04-10   1st Qu.:     15.0   1st Qu.:     15    Class :character   1st Qu.:2013-01-01   1st Qu.:2013-04-01   1st Qu.:0.00000  
 Median : 5404300   Median :15011583   Median :2015-04-03   Median :     63.2   Median :     79    Mode  :character   Median :2014-12-01   Median :2015-04-01   Median :0.00000  
 Mean   : 8160520   Mean   :14758473   Mean   :2014-09-14   Mean   :   1371.7   Mean   :   1393                       Mean   :2014-05-16   Mean   :2014-08-31   Mean   :0.03175  
 3rd Qu.:14665703   3rd Qu.:24306942   3rd Qu.:2016-11-19   3rd Qu.:    174.4   3rd Qu.:    218                       3rd Qu.:2016-08-01   3rd Qu.:2016-11-01   3rd Qu.:0.00000  
 Max.   :24113207   Max.   :29648411   Max.   :2020-03-27   Max.   :2823000.0   Max.   :2823000                       Max.   :2018-03-01   Max.   :2020-03-01   Max.   :1.00000  
                                                                                                                                                                                 
     months          status          num_previous_months num_previous_subs num_previous_months_binned firstpaiddate         channelcat        paymentperiodchosenatstart
 Min.   : 1.000   Length:2301011     Min.   :  0.00      Min.   : 0.00     Min.   : 0.00              Min.   :2003-12-15   Length:2301011     Min.   :-1.000            
 1st Qu.: 1.000   Class :character   1st Qu.:  1.00      1st Qu.: 1.00     1st Qu.: 1.00              1st Qu.:2011-01-01   Class :character   1st Qu.: 3.000            
 Median : 3.000   Mode  :character   Median :  8.00      Median : 3.00     Median : 8.00              Median :2013-07-01   Mode  :character   Median : 3.000            
 Mean   : 3.527                      Mean   : 15.33      Mean   : 6.02     Mean   :14.69              Mean   :2013-01-30                      Mean   : 4.242            
 3rd Qu.: 3.000                      3rd Qu.: 22.00      3rd Qu.: 8.00     3rd Qu.:26.00              3rd Qu.:2015-09-10                      3rd Qu.: 3.000            
 Max.   :24.000                      Max.   :162.00      Max.   :69.00     Max.   :39.00              Max.   :2018-03-28                      Max.   :24.000            
                                                                                                                                                                        
   currency          marketname         siteverkey        firstpaidmonth       firstdevice          segment          isquickpurchase  productversion       isfreemium    
 Length:2301011     Length:2301011     Length:2301011     Min.   :2003-12-01   Length:2301011     Length:2301011     Min.   :0.0000   Length:2301011     Min.   :0.0000  
 Class :character   Class :character   Class :character   1st Qu.:2011-01-01   Class :character   Class :character   1st Qu.:0.0000   Class :character   1st Qu.:0.0000  
 Mode  :character   Mode  :character   Mode  :character   Median :2013-07-01   Mode  :character   Mode  :character   Median :1.0000   Mode  :character   Median :0.0000  
                                                          Mean   :2013-01-16                                         Mean   :0.5291                      Mean   :0.2602  
                                                          3rd Qu.:2015-09-01                                         3rd Qu.:1.0000                      3rd Qu.:1.0000  
                                                          Max.   :2018-03-01                                         Max.   :1.0000                      Max.   :1.0000  
                                                                                                                     NA's   :274                         NA's   :274     
  model31224        threetoonestartdate  market_category    siteverkey_cat     siteverkey_cat2    chosen_subs_length isthreetoonestate  gdppercapita      gdppercapita_scaled
 Length:2301011     Min.   :2017-02-14   Length:2301011     Length:2301011     Length:2301011     Length:2301011     Min.   :0.00000   Min.   :   218.3   Min.   :-2.4363    
 Class :character   1st Qu.:2017-03-14   Class :character   Class :character   Class :character   Class :character   1st Qu.:0.00000   1st Qu.: 42013.3   1st Qu.:-0.4640    
 Mode  :character   Median :2017-04-18   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Median :0.00000   Median : 55670.9   Median : 0.1805    
                    Mean   :2017-05-25                                                                               Mean   :0.04074   Mean   : 51846.7   Mean   : 0.0000    
                    3rd Qu.:2017-09-06                                                                               3rd Qu.:0.00000   3rd Qu.: 60637.3   3rd Qu.: 0.4148    
                    Max.   :2017-12-19                                                                               Max.   :1.00000   Max.   :108422.5   Max.   : 2.6698    
                    NA's   :2144362                                                                                                                                          
 subscription_summary subscription_summary_no_market
 Length:2301011       Length:2301011                
 Class :character     Class :character              
 Mode  :character     Mode  :character              
                                                    
                                                    
                                                    
                                                    
subscriptions_with_target <- subscriptions %>%
  # restrict to a recent expiry window
  filter(endmonth >= begin_train_window & endmonth < end_window) %>%
  mutate(num_previous_months_binned_fct = as.factor(num_previous_months_binned)) %>%
  mutate(set_type = as.factor(if_else(endmonth >= begin_validation_window, 'validation', 'training'))) %>%
  
  mutate(churnind = ifelse(status == 'churn', 1, 0)) 

Prepare churntable that we want to predict.

churntable <- subscriptions_with_target %>%
  
  group_by(set_type, siteverkey_cat2, market_category, months, num_previous_months_binned, chosen_subs_length, subscription_summary_no_market) %>%
  summarise(num_obs = n(), 
            churned = sum(churnind)) %>%
  
  group_by(set_type) %>%
  mutate(churn_rate = churned / num_obs,
         renew_rate = 1 - churn_rate,
         month_churn = 1 - renew_rate ^ (1/as.double(months)),
         log_month_churn = log(month_churn),
         weight = num_obs / sum(num_obs))
# NB! Does this introduce a bad bias ????
churntable_no_zeros <- churntable %>%
  filter(churn_rate > 0)

Train model

new_model=glm(log_month_churn ~ market_category + subscription_summary_no_market, data=churntable_no_zeros[churntable_no_zeros$set_type == 'training', ], weights = weight)
write_rds(new_model, '../data/models/churn_model.rds')

Model validation for training (2017-01-01 - 2017-08-01) and validation (2017-09-01 - 2018-01-01) sets:

validation_plots(prediction_table, minimal_share = 0.01)
NAs introduced by coercion

Try simple logistic model

model_logit <- glm(churnind ~ market_category + siteverkey_cat2 + num_previous_months_binned + months + chosen_subs_length,
                   data = subscriptions_with_target[subscriptions_with_target$set_type == 'training',], family = 'binomial')

Model validation for training (2017-01-01 - 2017-08-01) and validation (2017-09-01 - 2018-01-01) sets:

prediction_table_logit <- validation(subscriptions_with_target, model_logit)

validation_plots(prediction_table_logit, minimal_share = 0.01)
NAs introduced by coercion

LS0tCnRpdGxlOiAiQ2h1cm46IFR3byBkaW1lbnNpb25zIGFuZCBwcmVkaWN0IGxvZyBvZiBtb250aGx5IGNodXJuIHByb2JhYmlsaXR5IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Kc291cmNlKCdjb25maWcuUicpCnNvdXJjZSgndXRpbHMuUicpCnNvdXJjZSgndXRpbHNfdmFsaWRhdGlvbi5SJykKCiMgc291cmNlKCduZXdfbGlmZXRpbWVwcmVkaWN0b3IuUicpCmBgYAoKUmVhZCBwcmVwYXJlZCBkYXRhLgoKYGBge3IgcmVhZGluZywgdGlkeT1GfQpzdWJzY3JpcHRpb25zIDwtIHJlYWRfcmRzKCcuLi9kYXRhL3N1YnNjcmlwdGlvbnMucmRzJykKYGBgCgpgYGB7cn0Kc3VtbWFyeShzdWJzY3JpcHRpb25zKQpgYGAKCmBgYHtyfQpzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0IDwtIHN1YnNjcmlwdGlvbnMgJT4lCiAgIyByZXN0cmljdCB0byBhIHJlY2VudCBleHBpcnkgd2luZG93CiAgZmlsdGVyKGVuZG1vbnRoID49IGJlZ2luX3RyYWluX3dpbmRvdyAmIGVuZG1vbnRoIDwgZW5kX3dpbmRvdykgJT4lCiAgbXV0YXRlKG51bV9wcmV2aW91c19tb250aHNfYmlubmVkX2ZjdCA9IGFzLmZhY3RvcihudW1fcHJldmlvdXNfbW9udGhzX2Jpbm5lZCkpICU+JQogIG11dGF0ZShzZXRfdHlwZSA9IGFzLmZhY3RvcihpZl9lbHNlKGVuZG1vbnRoID49IGJlZ2luX3ZhbGlkYXRpb25fd2luZG93LCAndmFsaWRhdGlvbicsICd0cmFpbmluZycpKSkgJT4lCiAgCiAgbXV0YXRlKGNodXJuaW5kID0gaWZlbHNlKHN0YXR1cyA9PSAnY2h1cm4nLCAxLCAwKSkgCmBgYAoKClByZXBhcmUgY2h1cm50YWJsZSB0aGF0IHdlIHdhbnQgdG8gcHJlZGljdC4KCmBgYHtyfQpjaHVybnRhYmxlIDwtIHN1YnNjcmlwdGlvbnNfd2l0aF90YXJnZXQgJT4lCiAgCiAgZ3JvdXBfYnkoc2V0X3R5cGUsIHNpdGV2ZXJrZXlfY2F0MiwgbWFya2V0X2NhdGVnb3J5LCBtb250aHMsIG51bV9wcmV2aW91c19tb250aHNfYmlubmVkLCBjaG9zZW5fc3Vic19sZW5ndGgsIHN1YnNjcmlwdGlvbl9zdW1tYXJ5X25vX21hcmtldCkgJT4lCiAgc3VtbWFyaXNlKG51bV9vYnMgPSBuKCksIAogICAgICAgICAgICBjaHVybmVkID0gc3VtKGNodXJuaW5kKSkgJT4lCiAgCiAgZ3JvdXBfYnkoc2V0X3R5cGUpICU+JQogIG11dGF0ZShjaHVybl9yYXRlID0gY2h1cm5lZCAvIG51bV9vYnMsCiAgICAgICAgIHJlbmV3X3JhdGUgPSAxIC0gY2h1cm5fcmF0ZSwKICAgICAgICAgbW9udGhfY2h1cm4gPSAxIC0gcmVuZXdfcmF0ZSBeICgxL2FzLmRvdWJsZShtb250aHMpKSwKICAgICAgICAgbG9nX21vbnRoX2NodXJuID0gbG9nKG1vbnRoX2NodXJuKSwKICAgICAgICAgd2VpZ2h0ID0gbnVtX29icyAvIHN1bShudW1fb2JzKSkKCiMgTkIhIERvZXMgdGhpcyBpbnRyb2R1Y2UgYSBiYWQgYmlhcyA/Pz8/CmNodXJudGFibGVfbm9femVyb3MgPC0gY2h1cm50YWJsZSAlPiUKICBmaWx0ZXIoY2h1cm5fcmF0ZSA+IDApCmBgYAoKVHJhaW4gbW9kZWwKCmBgYHtyfQpuZXdfbW9kZWw9Z2xtKGxvZ19tb250aF9jaHVybiB+IG1hcmtldF9jYXRlZ29yeSArIHN1YnNjcmlwdGlvbl9zdW1tYXJ5X25vX21hcmtldCwgZGF0YT1jaHVybnRhYmxlX25vX3plcm9zW2NodXJudGFibGVfbm9femVyb3Mkc2V0X3R5cGUgPT0gJ3RyYWluaW5nJywgXSwgd2VpZ2h0cyA9IHdlaWdodCkKCmNsYXNzKG5ld19tb2RlbCkgPC0gYygnbG9nX21vbnRoX2NodXJuJywgY2xhc3MobmV3X21vZGVsKSkKd3JpdGVfcmRzKG5ld19tb2RlbCwgJy4uL2RhdGEvbW9kZWxzL2NodXJuX21vZGVsLnJkcycpCmBgYAoKTW9kZWwgdmFsaWRhdGlvbiBmb3IgdHJhaW5pbmcgKDIwMTctMDEtMDEgLSAyMDE3LTA4LTAxKSBhbmQgdmFsaWRhdGlvbiAoMjAxNy0wOS0wMSAtIDIwMTgtMDEtMDEpIHNldHM6CgoqIFN1bW1hcnkgdGFibGUgY29udGFpbmluZwogICAgKyBOdW1iZXIgb2Ygb2JzZXJ2YXRpb25zIHdpdGhvdXQgcHJlZGljdGlvbgogICAgKyBBVUMsIGxvZ2xvc3MgLSBwcmVkaWN0aW9uIHF1YWxpdHkgbWV0cmljcwoqIFJPQyBjdXJ2ZQoqIFBsb3RzIHBlciBtYXJrZXQKICAgICsgQWdlIG9mIGN1c3RvbWVycyB2cyByZWFsIGFuZCBwcmVkaWN0ZWQgcHJvYmFiaWxpdHkgb2YgY2h1cm4gZm9yIGRpZmZlcmVudCBzdWJzY3JpcHRpb24gbGVuZ3Rocy4gSXQgc2hvd3MgaWYgd2UgYXJlIGNvcnJlY3RseSBwcmVkaWN0aW5nIHByb2JhYmlsaXR5IG9mIGNodXJuIGZvciBjdXN0b21lcnMgb3ZlciBsaWZldGltZS4KICAgICsgQ2FsaWJyYXRpb24gLSBQcmVkaWN0ZWQgcHJvYmFiaWxpdHkgb2YgY2h1cm4gdnMgcmVhbCBwcm9iYWJpbGl0eSBvZiBjaHVybiBmb3IgZGlmZmVyZW50IHN1YnNjcmlwdGlvbiBsZW5ndGhzICh3ZWxsIGNhbGlicmF0ZWQgcHJlZGljdGlvbiBzaG91bGQgZm9ybSBhIGRpYWdvbmFsIGxpbmUpLiBTaG93cyBpZiBvdXRjb21lIG9mIG1vZGVsIGluIHF1ZXN0aW9uIGNhbiBiZSByZWFseSB0cmVhdGVkIGFzIHByb2JhYmlsaXR5LgoKYGBge3Igd2FybmluZz1GfQpwcmVkaWN0aW9uX3RhYmxlIDwtIHZhbGlkYXRpb24oc3Vic2NyaXB0aW9uc193aXRoX3RhcmdldCwgbmV3X21vZGVsLCBwcmVkaWN0XzJmY3RfbW9kZWwpCmBgYAoKYGBge3IgZmlnLmhlaWdodD0xMCwgZmlnLndpZHRoPTcsIHdhcm5pbmc9Rn0KdmFsaWRhdGlvbl9wbG90cyhwcmVkaWN0aW9uX3RhYmxlLCBtaW5pbWFsX3NoYXJlID0gMC4wMSkKYGBgCgpUcnkgc2ltcGxlIGxvZ2lzdGljIG1vZGVsCmBgYHtyfQptb2RlbF9sb2dpdCA8LSBnbG0oY2h1cm5pbmQgfiBtYXJrZXRfY2F0ZWdvcnkgKyBzaXRldmVya2V5X2NhdDIgKyBudW1fcHJldmlvdXNfbW9udGhzX2Jpbm5lZCArIG1vbnRocyArIGNob3Nlbl9zdWJzX2xlbmd0aCwKICAgICAgICAgICAgICAgICAgIGRhdGEgPSBzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0W3N1YnNjcmlwdGlvbnNfd2l0aF90YXJnZXQkc2V0X3R5cGUgPT0gJ3RyYWluaW5nJyxdLCBmYW1pbHkgPSAnYmlub21pYWwnKQpgYGAKCgpNb2RlbCB2YWxpZGF0aW9uIGZvciB0cmFpbmluZyAoMjAxNy0wMS0wMSAtIDIwMTctMDgtMDEpIGFuZCB2YWxpZGF0aW9uICgyMDE3LTA5LTAxIC0gMjAxOC0wMS0wMSkgc2V0czoKCiogU3VtbWFyeSB0YWJsZSBjb250YWluaW5nCiAgICArIE51bWJlciBvZiBvYnNlcnZhdGlvbnMgd2l0aG91dCBwcmVkaWN0aW9uCiAgICArIEFVQywgbG9nbG9zcyAtIHByZWRpY3Rpb24gcXVhbGl0eSBtZXRyaWNzCiogUk9DIGN1cnZlCiogUGxvdHMgcGVyIG1hcmtldAogICAgKyBBZ2Ugb2YgY3VzdG9tZXJzIHZzIHJlYWwgYW5kIHByZWRpY3RlZCBwcm9iYWJpbGl0eSBvZiBjaHVybiBmb3IgZGlmZmVyZW50IHN1YnNjcmlwdGlvbiBsZW5ndGhzLiBJdCBzaG93cyBpZiB3ZSBhcmUgY29ycmVjdGx5IHByZWRpY3RpbmcgcHJvYmFiaWxpdHkgb2YgY2h1cm4gZm9yIGN1c3RvbWVycyBvdmVyIGxpZmV0aW1lLgogICAgKyBDYWxpYnJhdGlvbiAtIFByZWRpY3RlZCBwcm9iYWJpbGl0eSBvZiBjaHVybiB2cyByZWFsIHByb2JhYmlsaXR5IG9mIGNodXJuIGZvciBkaWZmZXJlbnQgc3Vic2NyaXB0aW9uIGxlbmd0aHMgKHdlbGwgY2FsaWJyYXRlZCBwcmVkaWN0aW9uIHNob3VsZCBmb3JtIGEgZGlhZ29uYWwgbGluZSkuIFNob3dzIGlmIG91dGNvbWUgb2YgbW9kZWwgaW4gcXVlc3Rpb24gY2FuIGJlIHJlYWx5IHRyZWF0ZWQgYXMgcHJvYmFiaWxpdHkuCmBgYHtyIHdhcm5pbmc9Rn0KcHJlZGljdGlvbl90YWJsZV9sb2dpdCA8LSB2YWxpZGF0aW9uKHN1YnNjcmlwdGlvbnNfd2l0aF90YXJnZXQsIG1vZGVsX2xvZ2l0KQpgYGAKCmBgYHtyIGZpZy5oZWlnaHQ9MTAsIGZpZy53aWR0aD03LCB3YXJuaW5nPUZ9CnZhbGlkYXRpb25fcGxvdHMocHJlZGljdGlvbl90YWJsZV9sb2dpdCwgbWluaW1hbF9zaGFyZSA9IDAuMDEpCmBgYA==